# 160713 - script to blast OTUs against NCBI / BLOD
setwd("~/Documents/UNI_und_VORLESUNGEN/11 phd projects/1 Meta SCHMALNAU/2 HiSeq biomass 160707/8 taxonomy")

folder <- "1 BOLD_data"
hittable <- "../7 OTUs/F) hit_tab+.csv"
outputname <- "OTU_BPsort_tax_NEW.csv"

data <- read.csv(hittable, stringsAsFactors=F)

table_list <- list.files(folder, full.names=T, pattern="_hacked.txt")

temp <- NULL
for (f in 1:length(table_list)){
BOLDdata <- read.table(table_list[f], stringsAsFactors=F)
OTU <- sub(".+(OTU_.*)_hacked.*", "\\1", table_list[f])
temp <- rbind(temp, cbind("OTU"=OTU, BOLDdata[1,]))
}

temp <- data.frame(temp, stringsAsFactors=F)

temp[temp==""] <- NA
speciesOK <- which(!is.na(temp$Species))

temp$Genus[speciesOK] <- temp$Species[speciesOK]

temp2 <- temp[,-c(5, 6, 9, 11)]


names(temp2)[2] <- "BOLD_ID"
names(temp2)[1] <- "ID"

# merge Identification tables and OTU length in bp
meep <- merge(data, temp2, all=T, bx.x="ID", by.y="ID")


#nrow(meep)
#nrow(temp2)
#nrow(data)
#head(temp2)
#head(data)
#head(meep)

meep <- cbind("X"=as.numeric(sub("OTU_", "", meep$ID)), meep)

meep <- meep[order(meep$X),] 

nrow(meep)
head(meep)

cbind(names(meep))


# import OTU sequences
library("seqinr")
sequ <- read.fasta("../7 OTUs/D) OTU_KEEP.txt", seqonly=T)



# combine lane 1 + 2!!!
meep2 <- cbind(meep[1:2], meep[3:42]+meep[43:82], "sequ"=unlist(sequ), meep[83:88])

names(meep2) <- sub("L001_", "", names(meep2))
head(meep2)


#reorder
cbind(names(meep2))
meep3 <- meep2[c(1,2, c(3,5,7,9,11, 23,25,27,29,31, 4,6,8,10,12, 33,35,37,39,41, 13,15,17,19,21, 24,26,28,30,32, 14,16,18,20,22, 34,36,38,40,42), 43:49)]

cbind(names(meep3))

meep4 <- meep3
samples <- c("S8", "M8", "L8", "gew8", "N8", "S10", "M10", "L10", "gew10", "N10")
names(meep4[3:42]) <- paste(names(meep3[3:42]), samples, sep="_")

head(meep4, 20)

write.csv(meep4, file= outputname, row.names=F)






